# run.py
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from news_scraper.spiders.news_spider import NewsSpider
import os


def main():
    # 获取项目设置
    settings = get_project_settings()

    # 确保output目录存在
    output_dir = 'output'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 添加Feed导出设置
    settings.set('FEEDS', {
        'output/articles.json': {
            'format': 'json',
            'encoding': 'utf8',
            'store_empty': False,
            'overwrite': True,
        }
    })

    # 创建爬虫进程并运行
    process = CrawlerProcess(settings)
    process.crawl(NewsSpider)
    process.start()


if __name__ == '__main__':
    main()